This document summarizes Rick Gilmore’s analysis of participant sorting data using graph and network analysis tools.
The Jaccard index data are found in analysis/data/jaccard.csv.
jaccard_raw <- readr::read_csv("analysis/data/jaccard.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Exemplar.Row = col_double(),
## Exemplar.Col = col_double(),
## Jaccard = col_double(),
## Group = col_character()
## )
str(jaccard_raw)
## spec_tbl_df [950 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Exemplar.Row: num [1:950] 1 1 1 1 1 1 1 1 1 1 ...
## $ Exemplar.Col: num [1:950] 2 2 2 2 2 3 3 3 3 3 ...
## $ Jaccard : num [1:950] 0.0476 0.1186 0.1228 0.2 0.2692 ...
## $ Group : chr [1:950] "P31M" "P3M1" "P6M" "P6" ...
## - attr(*, "spec")=
## .. cols(
## .. Exemplar.Row = col_double(),
## .. Exemplar.Col = col_double(),
## .. Jaccard = col_double(),
## .. Group = col_character()
## .. )
It’s probably wise to reorder the data frame by wallpaper group, Jaccard index, and exemplar index.
jaccard <- jaccard_raw %>%
dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))
Let’s add a Jaccard mean and median by Exemplar.Row.
jaccard_aug <- jaccard %>%
dplyr::group_by(., Group, Exemplar.Row) %>%
dplyr::mutate(.,
j_mean = mean(Jaccard),
j_med = median(Jaccard),
j_max = max(Jaccard),
j_min = min(Jaccard)
)
p1 <- jaccard %>%
dplyr::filter(., Group == "P1")
p1_edges <- tibble(from = p1$Exemplar.Row,
to = p1$Exemplar.Col,
weight = p1$Jaccard)
p1_nodes <- tibble(id = 1:20)
p1_network <- network::network(p1_edges, vertex.attr = p1_nodes,
matrix.type = "edgelist", ignore.eval = FALSE,
directed = FALSE)
plot(p1_network, vertex.cex = 3, mode='circle')
Let’s pick the top ten strongest connections.
p1_tidy <- tidygraph::tbl_graph(nodes = p1_nodes, edges = p1_edges,
directed = FALSE)
ggraph::ggraph(p1_tidy) + geom_edge_link() + geom_node_point() + theme_graph()
## Using `stress` as default layout
ggraph(p1_tidy, layout = "graphopt") +
geom_node_point() +
geom_edge_link(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id), repel = TRUE) +
labs(edge_width = "Jaccard") +
theme_graph()
ggraph(p1_tidy, layout = "linear") +
geom_edge_arc(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
Let’s pick the top two exemplars to plot.
p1_e8 <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 8)
ggraph(p1_e8, layout = "linear") +
geom_edge_arc(aes(width = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_e10 <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 10 | to == 10)
ggraph(p1_e10, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_e10 <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 10 | to == 10)
ggraph(p1_e10, layout = "graphopt") +
geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 8 | to == 8)
ggraph(p1_selected, layout = "graphopt") +
geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 8 | to == 8)
ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 10 | to == 10)
ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
jaccard_aug %>%
dplyr::filter(., Group == "P1") %>%
dplyr::arrange(., desc(j_mean))
## # A tibble: 190 x 8
## # Groups: Group, Exemplar.Row [19]
## Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max j_min
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 19 20 0.32 P1 0.32 0.32 0.32 0.32
## 2 16 20 0.375 P1 0.244 0.26 0.375 0.0820
## 3 16 17 0.32 P1 0.244 0.26 0.375 0.0820
## 4 16 19 0.2 P1 0.244 0.26 0.375 0.0820
## 5 16 18 0.0820 P1 0.244 0.26 0.375 0.0820
## 6 10 16 0.404 P1 0.237 0.222 0.404 0.0820
## 7 10 15 0.347 P1 0.237 0.222 0.404 0.0820
## 8 10 20 0.347 P1 0.237 0.222 0.404 0.0820
## 9 10 19 0.269 P1 0.237 0.222 0.404 0.0820
## 10 10 12 0.222 P1 0.237 0.222 0.404 0.0820
## # … with 180 more rows
It looks like exemplars 19 and 16 are are among the highest.
jaccard_aug %>%
dplyr::filter(., Group == "P1") %>%
dplyr::arrange(., j_mean)
## # A tibble: 190 x 8
## # Groups: Group, Exemplar.Row [19]
## Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max j_min
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 18 19 0.1 P1 0.1 0.1 0.1 0.1
## 2 18 20 0.1 P1 0.1 0.1 0.1 0.1
## 3 11 13 0.226 P1 0.168 0.182 0.226 0.0833
## 4 11 17 0.226 P1 0.168 0.182 0.226 0.0833
## 5 11 14 0.204 P1 0.168 0.182 0.226 0.0833
## 6 11 15 0.182 P1 0.168 0.182 0.226 0.0833
## 7 11 18 0.182 P1 0.168 0.182 0.226 0.0833
## 8 11 20 0.182 P1 0.168 0.182 0.226 0.0833
## 9 11 16 0.121 P1 0.168 0.182 0.226 0.0833
## 10 11 12 0.102 P1 0.168 0.182 0.226 0.0833
## # … with 180 more rows
18 and 11 among the lowest
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 18 | to == 18)
ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.2, 2)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 19 | to == 19)
g <- ggraph(p1_selected, layout = "linear") +
geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) +
scale_edge_width(range = c(0.1, 4), limits = c(0, .6)) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
g
p1_selected <- p1_tidy %>%
activate(edges) %>%
dplyr::filter(., from == 19 | to == 19)
p1_selected <- p1_selected %>%
dplyr::mutate(weight = cut(weight, c(0, .1, .2, .3, .4, .5, .6)))
g <- ggraph(p1_selected, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = factor(weight))) +
geom_node_text(aes(label = id)) +
labs(edge_width = "Jaccard") +
theme_graph()
g
plot_jaccard_vals <- function(df, exemplar_id, group, mean_j = NA) {
df <- df %>%
activate(edges) %>%
dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
dplyr::mutate(weight = cut(weight, c(0, .1, .2, .3, .4, .5, .6), labels = c("<.1", ".1-.2", ".2-.3", ".3-.4", ".4-.5", ".5-.6")))
ggraph(df, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = weight)) +
geom_node_text(aes(label = id)) +
ggtitle(paste0(group, " | Exemplar ", exemplar_id, " | mean Jaccard ", mean_j)) +
theme_graph()
}
plot_jaccard_vals(p1_tidy, 11, "P1")
plot_jaccard_vals(p1_tidy, 19, "P1")
plot_jaccard_vals(p1_tidy, 16, "P1")
plot_jaccard_vals(p1_tidy, 18, "P1")
plot_jaccard_vals(p1_tidy, 11, "P1")
wp_graph <- function(df, group) {
out_df <- df %>%
dplyr::filter(., Group == group)
df_edges <- tibble(from = out_df$Exemplar.Row,
to = out_df$Exemplar.Col,
weight = out_df$Jaccard)
df_nodes <- tibble(id = 1:20)
tidygraph::tbl_graph(nodes = df_nodes,
edges = df_edges,
directed = FALSE)
}
jaccard_stats <- function(jaccard) {
jaccard %>%
dplyr::group_by(., Group, Exemplar.Row) %>%
dplyr::mutate(.,
j_mean = mean(Jaccard),
j_med = median(Jaccard),
j_max = max(Jaccard),
j_min = min(Jaccard)
) %>%
dplyr::summarise(.,
Jaccard_mean = mean(j_mean),
Jaccard_med = mean(j_med),
Jaccard_max = mean(j_max),
Jaccard_min = mean(j_min) )
}
graph <- wp_graph(jaccard, "P31M")
j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
plot_jaccard_vals(graph, 18, "P31M")
pick_extreme_mean_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
this_group <- j_stats %>%
dplyr::filter(., Group == group)
if (hi_lo == "hi") {
this_group <- this_group %>%
dplyr::arrange(., desc(Jaccard_mean))
} else {
this_group <- this_group %>%
dplyr::arrange(., Jaccard_mean)
}
this_group$Exemplar.Row[1:n_exemplars]
}
pick_extreme_max_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
this_group <- j_stats %>%
dplyr::filter(., Group == group)
if (hi_lo == "hi") {
this_group <- this_group %>%
dplyr::arrange(., desc(Jaccard_max))
} else {
this_group <- this_group %>%
dplyr::arrange(., Jaccard_max)
}
this_group$Exemplar.Row[1:n_exemplars]
}
pick_extreme_min_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
this_group <- j_stats %>%
dplyr::filter(., Group == group)
if (hi_lo == "hi") {
this_group <- this_group %>%
dplyr::arrange(., desc(Jaccard_min))
} else {
this_group <- this_group %>%
dplyr::arrange(., Jaccard_min)
}
this_group$Exemplar.Row[1:n_exemplars]
}
jaccard <- jaccard_raw %>%
dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))
j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
this_group = "P31M"
graph <- wp_graph(jaccard, this_group)
hi_1_max <- pick_extreme_max_exemplars(j_stats, this_group, hi_lo = "hi")
lo_1_max <- pick_extreme_max_exemplars(j_stats, this_group, hi_lo = "lo")
(hi_max_j <- j_stats$Jaccard_mean[hi_1_max])
## [1] 0.190652
(lo_max_j <- j_stats$Jaccard_mean[lo_1_max])
## [1] 0.32
plot_jaccard_vals(graph, hi_1_max, this_group, hi_max_j)
plot_jaccard_vals(graph, lo_1_max, this_group, lo_max_j)